---
 title: "Wave 1 Randomization"
output:
  pdf_document: default
date: "2025-07-21"
---

```{r setup, echo=TRUE, results='hide', message=FALSE, warning=FALSE}
rm(list = ls())

library(dplyr)
library(AER)
library(broom)
library(ggplot2)
library(blockTools)
library(tidyverse)
library(quickblock)
library(data.table)
library(randomizr)
library(gtsummary)
library(jtools)
library(pwr)
library(stargazer)
library(broom)
library(purrr)
library(psych)

```


# Recoding variables and creating scales

```{r}
#Loading Data
data <- read_csv("wave1_nopii.csv")

data <- data %>%
  mutate(
    race_ethnicity = as.factor(race_ethnicity),
    gender = as.factor(gender),
    party_scale = case_when(
  party_id == 2 & !is.na(party_strength_d) & party_strength_d == 1 ~ 1,  # Strong Democrat
  party_id == 2 & !is.na(party_strength_d) & party_strength_d == 2 ~ 2,  # Not very strong Democrat
  party_id == 2 & is.na(party_strength_d) ~ 2,  # If party_id is 2 and party_strength_d is NA, make them a 2
  party_id == 3 & !is.na(party_closer) & party_closer == 3 ~ 3,    # Lean Democrat
  party_id == 3 & !is.na(party_closer) & party_closer == 2 ~ 4,    # Independent
  party_id == 4 ~ 4,  # Something else is an independent
  party_id == 3 & !is.na(party_closer) & party_closer == 1 ~ 5,    # Lean Republican
  party_id == 1 & !is.na(party_strength_r) & party_strength_r == 2 ~ 6,  # Not very strong Republican
  party_id == 1 & is.na(party_strength_r) ~ 6,  # If party_id is 1 and party_strength_r is NA, make them a 6
  party_id == 1 & !is.na(party_strength_r) & party_strength_r == 1 ~ 7,  # Strong Republican
  TRUE ~ NA_real_  # Handle remaining missing values as NA
),
    across(starts_with("electoral_record_"), ~ ifelse(is.na(.), 3, ifelse(. == 6, 3, .))), #recode NA's or DK's to middle
    across(starts_with("confidence_"), ~ ifelse(is.na(.), 3, .)),#recode NA's to middle
    low_propensity_voter = as.integer(voted_2022 == 0 & voted_2018 == 0),
    age_bracket = case_when(
      age >= 18 & age <= 29 ~ 1,
      age >= 30 & age <= 45 ~ 2,
      age >= 46 & age <= 64 ~ 3,
      age >= 65 ~ 4
    )
  )

#Create attitude indices 
data$electoral_record_index <-(data$electoral_record_1 + data$electoral_record_2 + data$electoral_record_3+ data$electoral_record_4)/4

data$confidence_index <-(data$confidence_1 + data$confidence_2 + data$confidence_3 + data$confidence_4)/4

# Calculate the 25th, 50th, and 75th quantiles for electoral_record_index
quantiles <- quantile(data$electoral_record_index, probs = c(0.25, 0.50, 0.75), na.rm= TRUE)

# Bracket it
data <- data %>%
  mutate(electoral_record_bracket = case_when(
           electoral_record_index <= quantiles[1] ~ 1,  # Below or at the 25th percentile
           electoral_record_index > quantiles[1] & electoral_record_index <= quantiles[2] ~ 2,  #Between 25th and 50th percentile
           electoral_record_index > quantiles[2] & electoral_record_index <= quantiles[3] ~ 3,  # Between 50th and 75th percentile
           electoral_record_index > quantiles[3] ~ 4  #Above 75th percentile
         ))

# Calculate the 25th, 50th, and 75th quantiles for confidence_index
quantiles_confidence <- quantile(data$confidence_index, probs = c(0.25, 0.50, 0.75), na.rm= TRUE)

data <- data %>%
  mutate(confidence_bracket = case_when(
           confidence_index <= quantiles_confidence[1] ~ 1,  # Below or at the 25th percentile
           confidence_index > quantiles_confidence[1] & confidence_index <= quantiles_confidence[2] ~ 2,  # Between 25th and 50th percentile
           confidence_index > quantiles_confidence[2] & confidence_index <= quantiles_confidence[3] ~ 3,  # Between 50th and 75th percentile
           confidence_index > quantiles_confidence[3] ~ 4  # Above 75th percentile
         ))

```


# Randomization  

```{r message=FALSE, warning=FALSE}
set.seed(4546)

#blocking vars
data <- data %>%
  mutate(block_id = paste(voted_trump, confidence_bracket, electoral_record_bracket, 
                          voted_2020, education, age_bracket, sep = "_"))

data <- data %>%
  group_by(block_id) %>%
  mutate(condition = sample(c("control", "pr", "tool"), size = n(), replace = TRUE)) %>%
  ungroup()

```

#Balance check

```{r message=FALSE, warning=FALSE}
# BALANCE CHECKS for continuous variables

covariates <- c("age_bracket","voted_trump", "education", "confidence_bracket", "age", "party_scale",
                "electoral_record_bracket", "voted_2020", 
                "voted_2022","education")

# Function to summarize and test balance for each covariate
balance_test <- function(data, covariate) {
  summary <- data %>%
    group_by(condition) %>%
    summarise_at(vars({{covariate}}), list(mean = ~mean(., na.rm = TRUE)))
  
  p_values <- t.test(data %>% 
                       filter(condition == unique(condition)[1]) %>% pull({{covariate}}),
                     data %>% 
                       filter(condition == unique(condition)[2]) %>% pull({{covariate}}))$p.value
  
  summary <- bind_cols(summary, p_value = p_values)
  return(summary)
}

# Apply the function to each covariate
results <- lapply(covariates, function(x) balance_test(data, x))

results_df <- bind_rows(results, .id = "Variable")

## results of balance test
print(results_df)

```

```{r message=FALSE, warning=FALSE}
# Balance checks for categorical variables

chisq.test(table(data$gender, data$condition))

chisq.test(table(data$race_ethnicity, data$condition))

table(data$condition)

```

```{r}
## Save Randomized List

write.csv(data, "wave1_randomized_list.csv")

```


